HW: Label the entire notebook with comments on what operation is each code chunk doing and what is the outcome
library(tidyverse)
library(plotly)
Penguins<-read.csv("penguins_size.csv")
We see that the data has 344 rows and 7 columns
Penguins
dim(Penguins)
[1] 344 7
Looking at summary stats
summary(Penguins)
species island culmen_length_mm culmen_depth_mm flipper_length_mm
Length:344 Length:344 Min. :32.10 Min. :13.10 Min. :172.0
Class :character Class :character 1st Qu.:39.23 1st Qu.:15.60 1st Qu.:190.0
Mode :character Mode :character Median :44.45 Median :17.30 Median :197.0
Mean :43.92 Mean :17.15 Mean :200.9
3rd Qu.:48.50 3rd Qu.:18.70 3rd Qu.:213.0
Max. :59.60 Max. :21.50 Max. :231.0
NA's :2 NA's :2 NA's :2
body_mass_g sex
Min. :2700 Length:344
1st Qu.:3550 Class :character
Median :4050 Mode :character
Mean :4202
3rd Qu.:4750
Max. :6300
NA's :2
We notice that Species and island are read in as characters. We will convert them to factor.
Penguins$species<-as.factor(Penguins$species)
Penguins$island<-as.factor(Penguins$island)
Penguins$sex<-as.factor(Penguins$sex)
Looking at summary again
summary(Penguins)
species island culmen_length_mm culmen_depth_mm flipper_length_mm
Adelie :152 Biscoe :168 Min. :32.10 Min. :13.10 Min. :172.0
Chinstrap: 68 Dream :124 1st Qu.:39.23 1st Qu.:15.60 1st Qu.:190.0
Gentoo :124 Torgersen: 52 Median :44.45 Median :17.30 Median :197.0
Mean :43.92 Mean :17.15 Mean :200.9
3rd Qu.:48.50 3rd Qu.:18.70 3rd Qu.:213.0
Max. :59.60 Max. :21.50 Max. :231.0
NA's :2 NA's :2 NA's :2
body_mass_g sex
Min. :2700 . : 1
1st Qu.:3550 FEMALE:165
Median :4050 MALE :168
Mean :4202 NA's : 10
3rd Qu.:4750
Max. :6300
NA's :2
We will remove the null values and also any erroneous values for sex
Clean_DF<-na.omit(Penguins)
Clean_DF<-Clean_DF%>%
filter(sex == 'FEMALE' | sex == 'MALE')
This leaves us with the below summary stats:
summary(Clean_DF)
species island culmen_length_mm culmen_depth_mm flipper_length_mm
Adelie :146 Biscoe :163 Min. :32.10 Min. :13.10 Min. :172
Chinstrap: 68 Dream :123 1st Qu.:39.50 1st Qu.:15.60 1st Qu.:190
Gentoo :119 Torgersen: 47 Median :44.50 Median :17.30 Median :197
Mean :43.99 Mean :17.16 Mean :201
3rd Qu.:48.60 3rd Qu.:18.70 3rd Qu.:213
Max. :59.60 Max. :21.50 Max. :231
body_mass_g sex
Min. :2700 Length:333
1st Qu.:3550 Class :character
Median :4050 Mode :character
Mean :4207
3rd Qu.:4775
Max. :6300
Clean_DF$sex<-as.factor(Clean_DF$sex)
summary(Clean_DF)
species island culmen_length_mm culmen_depth_mm flipper_length_mm
Adelie :146 Biscoe :163 Min. :32.10 Min. :13.10 Min. :172
Chinstrap: 68 Dream :123 1st Qu.:39.50 1st Qu.:15.60 1st Qu.:190
Gentoo :119 Torgersen: 47 Median :44.50 Median :17.30 Median :197
Mean :43.99 Mean :17.16 Mean :201
3rd Qu.:48.60 3rd Qu.:18.70 3rd Qu.:213
Max. :59.60 Max. :21.50 Max. :231
body_mass_g sex
Min. :2700 FEMALE:165
1st Qu.:3550 MALE :168
Median :4050
Mean :4207
3rd Qu.:4775
Max. :6300
boxplot(Clean_DF[3:5])
boxplot(Clean_DF$body_mass_g)
p<-ggplot(Clean_DF, aes(sex, body_mass_g, fill=sex))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, culmen_length_mm, fill=sex))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, culmen_depth_mm, fill=sex))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(sex, flipper_length_mm, fill=sex))+
geom_boxplot()
ggplotly(p)
Looking at body measures by Species
p<-ggplot(Clean_DF, aes(species, body_mass_g, fill=species))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(species, culmen_length_mm, fill=species))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(species, culmen_depth_mm, fill=species))+
geom_boxplot()
ggplotly(p)
p<-ggplot(Clean_DF, aes(species, flipper_length_mm, fill=species))+
geom_boxplot()
ggplotly(p)
p<-ggplot(data = Clean_DF) +
geom_bar(mapping = aes(x = species, fill=species))
ggplotly(p)
p<-ggplot(data = Clean_DF) +
geom_bar(mapping = aes(x = island, fill=species))
ggplotly(p)
p<-ggplot(data = Clean_DF) +
geom_point(mapping = aes(x = culmen_length_mm, y = culmen_depth_mm,color = species, shape=island))
ggplotly(p)
p<-ggplot(data = Clean_DF) +
geom_point(mapping = aes(x = culmen_length_mm, y = flipper_length_mm,color = species, shape=island))
ggplotly(p)
p<-ggplot(data = Clean_DF) +
geom_point(mapping = aes(x = culmen_length_mm, y = body_mass_g, color = species, shape=island))
ggplotly(p)
library(corrplot)
M<-cor(Clean_DF[3:6])
corrplot(M,method="color",addCoef.col = "white")